import time

from bs4 import BeautifulSoup
import lxml
import requests
import chardet
import xlwt
from xlwt import Worksheet


def request_douban(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    try:
        response = requests.get(url,headers=headers)
        response.raise_for_status() # 抛出异常如果响应状态码不是200
        return response.text
    except requests.RequestException:
        print(f"请求失败，收到状态码：{response.status_code}")
        return None

def save_to_excel(sheet:Worksheet,soup:BeautifulSoup)->None:
    item_list = soup.find(class_="grid_view").find_all('li')

    for item in item_list:
        # print(item)
        item_name = item.find(class_="title").string
        item_img = item.find('a').find('img').get('src')
        item_index = item.find('div',class_='pic').find('em').string
        bd = item.find("div", class_="bd")
        item_score = bd.find("div", class_="star").find("span",class_="rating_num").get_text()
        item_author = bd.find("p").get_text(strip=True)
        try:
            item_intr = bd.find("span", class_='inq').string
        except AttributeError:
            item_intr = ""
        print('爬取电影：' + item_index + ' | ' + item_name +' | ' + item_img +' | ' + item_score +' | ' + item_author +' | ' + item_intr )
        # print('爬取电影：' + item_index + ' | ' + item_name + ' | ' + item_score + ' | ' + item_intr)


        sheet.write(int(item_index),0, item_name)
        sheet.write(int(item_index),1, item_img)
        sheet.write(int(item_index),2, item_index)
        sheet.write(int(item_index),3, item_score)
        sheet.write(int(item_index),4, item_author)
        sheet.write(int(item_index),5, item_intr)




def process_page(sheet:Worksheet, page:int):
    url = f'https://movie.douban.com/top250?start={page*25}&filter='
    html = request_douban(url)
    soup = BeautifulSoup(html,'lxml')
    save_to_excel(sheet, soup)


if __name__ == '__main__':


    book = xlwt.Workbook(encoding='utf-8',style_compression=0)
    sheet = book.add_sheet('豆瓣电影Top250',cell_overwrite_ok=True)
    sheet.write(0, 0, '名称')
    sheet.write(0, 1, '图片')
    sheet.write(0, 2, '排名')
    sheet.write(0, 3, '评分')
    sheet.write(0, 4, '作者')
    sheet.write(0, 5, '简介')

    for i in range(0,10):
        process_page(sheet,i)
        # time.sleep(1)

    sheet.col(4).width = 256 * 100
    sheet.col(5).width = 256 * 100

    book.save(u'豆瓣最受欢迎250部电影.xlsx')
